threads <- 4
pairs <- list(
"AR" = c("E2", "E2DHT"),
"H3K27ac" = c("E2", "E2DHT")
)
library(tidyverse)
library(glue)
library(pander)
library(yaml)
library(reactable)
library(plyranges)
library(rtracklayer)
library(VennDiagram)
library(UpSetR)
library(magrittr)
library(scales)
library(ggrepel)
library(rlang)
library(ggside)
library(msigdbr)
library(htmltools)
library(goseq)
library(parallel)
library(GenomicInteractions)
library(extraChIPs)
library(ggraph)
library(tidygraph)
source(here::here("workflow", "scripts", "custom_functions.R"))
panderOptions("big.mark", ",")
panderOptions("missing", "")
panderOptions("table.split.table", Inf)
theme_set(
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5)
)
)
config <- read_yaml(here::here("config", "config.yml"))
params <- read_yaml(here::here("config", "params.yml"))
targets <- names(pairs)
macs2_path <- here::here("output", "macs2", targets)
fdr_alpha <- config$comparisons$fdr
comps <- targets
if (length(unique(targets)) == 1) {
comps <- seq_along(pairs) %>%
vapply(
function(x) paste(
# names(pairs)[[x]],
paste(rev(pairs[[x]]), collapse = " Vs. ")#,
# sep = ": "
),
character(1)
)
}
n_targets = length(unique(names(pairs)))
samples <- file.path(macs2_path, "qc_samples.tsv") %>%
lapply(read_tsv) %>%
lapply(mutate_all, as.character) %>%
bind_rows() %>%
dplyr::filter(
qc == "pass",
(target == names(pairs)[[1]] & treat %in% pairs[[1]]) |
(target == names(pairs)[[2]] & treat %in% pairs[[2]])
) %>%
unite(label, target, label, remove = FALSE) %>%
mutate(
target = factor(target, levels = unique(targets)),
treat = factor(treat, levels = unique(unlist(pairs)))
) %>%
dplyr::select(-qc) %>%
droplevels()
stopifnot(nrow(samples) > 0)
rep_col <- setdiff(
colnames(samples), c("sample", "treat", "target", "input", "label", "qc")
)
samples[[rep_col]] <- as.factor(samples[[rep_col]])
annotation_path <- here::here("output", "annotations")
colours <- read_rds(file.path(annotation_path, "colours.rds"))
treat_colours <- unlist(colours$treat[levels(samples$treat)])
combs <- paste(
rep(comps, each = 4), c("Up", "Down", "Unchanged", "Undetected")
) %>%
combn(2) %>%
t() %>%
set_colnames(c("S1", "S2")) %>%
as_tibble() %>%
mutate(
C1 = str_remove(S1, " (Up|Down|Unchanged|Undetected)"),
C2 = str_remove(S2, " (Up|Down|Unchanged|Undetected)"),
) %>%
dplyr::filter(C1 != C2) %>%
unite(combn, S1, S2, sep = " - ") %>%
pull("combn")
direction_colours <- c(
'#e6194B', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#42d4f4', '#f032e6',
'#fabed4', '#469990', '#dcbeff', '#9A6324', '#fffac8', '#800000', '#aaffc3',
'#000075', '#a9a9a9'
) %>% # https://sashamaps.net/docs/resources/20-colors/
setNames(combs) %>%
c("Other" = "#4d4d4d")
comp_cols <- setNames(hcl.colors(3, "Zissou1", rev = TRUE), c(comps, "Both"))
cb <- config$genome$build %>%
str_to_lower() %>%
paste0(".cytobands")
data(list = cb)
bands_df <- get(cb)
sq <- read_rds(file.path(annotation_path, "seqinfo.rds"))
all_gr <- file.path(annotation_path, "all_gr.rds") %>%
read_rds()
id2gene <- setNames(all_gr$gene$gene_name, all_gr$gene$gene_id)
trans_models <- file.path(annotation_path, "trans_models.rds") %>%
read_rds()
genes_gr <- all_gr$gene
tss <- file.path(annotation_path, "tss.rds") %>%
read_rds()
blacklist <- here::here(annotation_path, "blacklist.bed.gz") %>%
import.bed(seqinfo = sq) %>%
sort()
external_features <- c()
has_features <- FALSE
if (!is.null(config$external$features)) {
external_features <- suppressWarnings(
import.gff(here::here(config$external$features), genome = sq)
)
keep_cols <- !vapply(
mcols(external_features), function(x) all(is.na(x)), logical(1)
)
mcols(external_features) <- mcols(external_features)[keep_cols]
has_features <- TRUE
feature_colours <- colours$features %>%
setNames(str_sep_to_title(names(.)))
}
gene_regions <- read_rds(file.path(annotation_path, "gene_regions.rds"))
regions <- vapply(gene_regions, function(x) unique(x$region), character(1))
region_colours <- unlist(colours$regions) %>% setNames(regions[names(.)])
rna_path <- here::here(config$external$rnaseq)
rnaseq <- tibble(gene_id = character(), gene_name = character())
if (length(rna_path) > 0) {
stopifnot(file.exists(rna_path))
if (str_detect(rna_path, "tsv$")) rnaseq <- read_tsv(rna_path)
if (str_detect(rna_path, "csv$")) rnaseq <- read_csv(rna_path)
if (!"gene_id" %in% colnames(rnaseq)) stop("Supplied RNA-Seq data must contain the column 'gene_id'")
genes_gr <- subset(genes_gr, gene_id %in% rnaseq$gene_id)
}
has_rnaseq <- as.logical(nrow(rnaseq))
tx_col <- intersect(c("tx_id", "transcript_id"), colnames(rnaseq))
rna_gr_col <- ifelse(length(tx_col) > 0, "transcript_id", "gene_id")
rna_col <- c(tx_col, "gene_id")[[1]]
hic <- GInteractions()
hic_path <- here::here(config$external$hic)
has_hic <- FALSE
if (length(hic_path) > 0)
if (file.exists(hic_path)) {
has_hic <- TRUE
hic <- makeGenomicInteractionsFromFile(hic_path, type = "bedpe")
reg_combs <- expand.grid(regions, regions) %>%
as.matrix() %>%
apply(
MARGIN = 1,
function(x) {
x <- sort(factor(x, levels = regions))
paste(as.character(x), collapse = " - ")
}
) %>%
unique()
hic$regions <- anchors(hic) %>%
vapply(
bestOverlap,
y = GRangesList(lapply(gene_regions, granges)),
character(length(hic))
) %>%
apply(MARGIN = 2, function(x) regions[x]) %>%
apply(
MARGIN = 1,
function(x) {
x <- sort(factor(x, levels = regions))
paste(as.character(x), collapse = " - ")
}
) %>%
factor(levels = reg_combs) %>%
fct_relabel(
str_replace_all,
pattern = "Promoter \\([0-9kbp/\\+-]+\\)", replacement = "Promoter"
)
if (has_features) {
feat_combs <- expand.grid(names(feature_colours), names(feature_colours)) %>%
as.matrix() %>%
apply(
MARGIN = 1,
function(x) {
x <- sort(factor(x, levels = names(feature_colours)))
paste(as.character(x), collapse = " - ")
}
) %>%
unique()
hic$features <- vapply(
anchors(hic),
function(x) bestOverlap(
x, external_features, var = "feature", missing = "no_feature"
),
character(length(hic))
) %>%
apply(
MARGIN = 1,
function(x) {
x <- sort(factor(x, levels = names(feature_colours)))
paste(as.character(x), collapse = " - ")
}
) %>%
factor(levels = feat_combs) %>%
fct_relabel(str_sep_to_title, pattern = "_")
}
}
stopifnot(is(hic, "GInteractions"))
seqlevels(hic) <- seqlevels(sq)
seqinfo(hic) <- sq
hic <- hic[!overlapsAny(hic, blacklist)]
oracle_peaks <- file.path(macs2_path, "oracle_peaks.rds") %>%
unique() %>%
sapply(read_rds) %>%
setNames(unique(basename(macs2_path)))
consensus_peaks <- file.path(macs2_path, "consensus_peaks.bed") %>%
lapply(import.bed, seqinfo = sq) %>%
setNames(basename(macs2_path))
all_consensus <- consensus_peaks %>%
GRangesList() %>%
unlist() %>%
GenomicRanges::reduce() %>%
sort()
db_results <- seq_along(pairs) %>%
lapply(
function(x) {
tg <- names(pairs)[[x]]
here::here(
"output", "differential_binding", tg,
glue("{tg}_{pairs[[x]][[1]]}_{pairs[[x]][[2]]}-differential_binding.rds")
)
}
) %>%
lapply(read_rds) %>%
lapply(mutate, fdr_mu0 = p.adjust(p_mu0, "fdr")) %>%
setNames(comps)
fdr_column <- ifelse(
"fdr_ihw" %in% colnames(mcols(db_results[[1]])),
"fdr_ihw",
str_subset("fdr", colnames(mcols(db_results[[1]])))
)
up_col <- function(x) {
if (is.na(x) | is.nan(x)) return("#ffffff")
rgb(
colorRamp(c("#ffffff", colours$direction[["up"]]))(x), maxColorValue = 255
)
}
down_col <- function(x) {
if (is.na(x) | is.nan(x)) return("#ffffff")
rgb(
colorRamp(c("#ffffff", colours$direction[["down"]]))(x), maxColorValue = 255
)
}
unch_col <- function(x) {
if (is.na(x) | is.nan(x)) return("#ffffff")
rgb(
colorRamp(c("#ffffff", colours$direction[["unchanged"]]))(x),
maxColorValue = 255
)
}
lfc_col <- function(x){
if (is.na(x) | is.nan(x)) return("#ffffff")
rgb(
colorRamp(c(colours$direction[["down"]], "#ffffff", colours$direction[["up"]]))(x),
maxColorValue = 255
)
}
expr_col <- function(x){
if (is.na(x) | is.nan(x)) return("#ffffff")
rgb(colorRamp(hcl.colors(9, "TealRose"))(x), maxColorValue = 255)
}
bar_style <- function(width = 1, fill = "#e6e6e6", height = "75%", align = c("left", "right"), color = NULL) {
align <- match.arg(align)
if (align == "left") {
position <- paste0(width * 100, "%")
image <- sprintf("linear-gradient(90deg, %1$s %2$s, transparent %2$s)", fill, position)
} else {
position <- paste0(100 - width * 100, "%")
image <- sprintf("linear-gradient(90deg, transparent %1$s, %2$s %1$s)", position, fill)
}
list(
backgroundImage = image,
backgroundSize = paste("100%", height),
backgroundRepeat = "no-repeat",
backgroundPosition = "center",
color = color
)
}
with_tooltip <- function(value, width = 30) {
htmltools::tags$span(title = value, str_trunc(value, width))
}
comp_name <- glue(
"{targets[[1]]}_{pairs[[1]][[1]]}_{pairs[[1]][[2]]}-",
"{targets[[2]]}_{pairs[[2]][[1]]}_{pairs[[2]][[2]]}"
)
fig_path <- here::here("docs", "assets", comp_name)
if (!dir.exists(fig_path)) dir.create(fig_path, recursive = TRUE)
fig_dev <- knitr::opts_chunk$get("dev")
fig_type <- fig_dev[[1]]
if (is.null(fig_type)) stop("Couldn't detect figure type")
fig_fun <- match.fun(fig_type)
if (fig_type %in% c("bmp", "jpeg", "png", "tiff")) {
## These figure types require dpi & resetting to be inches
formals(fig_fun)$units <- "in"
formals(fig_fun)$res <- 300
}
out_path <- here::here(
"output", "pairwise_comparisons", paste(targets, collapse = "_")
)
if (!dir.exists(out_path)) dir.create(out_path, recursive = TRUE)
all_out <- list(
csv = file.path(
out_path, glue(comp_name, "-pairwise_comparison.csv.gz"
)
),
de_genes = file.path(
out_path,
glue("{comp_name}-de_genes.csv")
),
goseq = file.path(
out_path,
glue("{comp_name}-enrichment.csv")
),
results = file.path(
out_path, glue(comp_name, "-all_windows.rds")
),
rna_enrichment = file.path(
out_path,
glue("{comp_name}-rnaseq_enrichment.csv")
),
renv = here::here(
"output", "envs",
glue(comp_name, "-pairwise_comparison.RData")
)
)
## Empty objects for when RNA-Seq is absent
de_genes_both_comps <- tibble()
cmn_res <- list(tibble(gs_name = character(), leadingEdge = list()))
This stage of the GRAVI workflow takes the results from two individual differential binding analyses and compares the results, by finding peaks which directly overlap and considering the joint behaviour. In this specific analyses the differential binding response for AR when comparing E2DHT Vs E2, will be integrated with the differential binding response for H3K27ac comparing E2DHT Vs E2. Data will first be described from the perspective of consensus peaks and oracle peaks, before moving onto the differentially bound windows obtained in previous steps.
The initial steps of differential binding analysis detect regions with clear changes in binding patterns, however the integration of datasets instead focusses on the correct classification of a given range/window. Differential binding analyses generally focus on controlling the Type II errors (i.e. false discoveries), which comes with a commensurate increase in Type I errors (false negatives). When seeking to correctly classify a window across two datasets, the detection as changed can be taken as a robust finding, whilst the consideration as unchanged may be less robust. When comparing a window across two datasets, a detection as changed within one dataset is taken as primary evidence of ‘something interesting’ occurring, with the secondary consideration being the correct description of the combined binding patterns. For this reason, and to minimise issues with hard thresholding, once a window is considered to be of interest, the p-value used in the second comparison is that obtained (and FDR-adjusted) using the test for \(H_0: \mu = 0\) instead of the initial \(H_0: 0 < |\mu| < \lambda\). The insistence on the more stringent initial inclusion criteria still protects against false discoveries, but this approach instead allows improved classification across both comparisons.
For classification steps within each window, each target is given the status 1) Up, 2) Down, 3) Unchanged, or 4) Undetected. Across two comparisons, this gives 15 possible classifications for each region found with at least one target bound (e.g. Up-Up, Up-Down, Up-Unchanged, Up-Undetected etc.), given that any potential Undetected-Undetected windows will not be included for obvious reasons.
After direct comparison of binding patterns, enrichment analysis is performed in a similar manner to for individual analyses. If RNA-Seq data is provided, genes will be restricted to those present within the RNA-Seq dataset, as representative of detected genes. Enrichment testing is performed on:
If RNA-Seq data is supplied, the sets of genes associated with all binding patterns (e.g. Up-Up, Up-Down etc) were treated as gene-sets and Gene Set Enrichment Analysis (GSEA) (Subramanian et al. 2005) was used to determine enrichment and leading-edge genes associated with altered expression. Genes will be ranked to incorporate direction of change (i.e. Directional GSEA) or significance only (i.e Non-Directional GSEA). The results from enrichment analysis across ChIP target and RNA-Seq data will also be combined to reveal pathways under the regulatory influence in either comparison, for which we also have evidence of changed gene expression.
Data was loaded for treatment-agnostic consensus peaks for AR and H3K27ac with treatment-specific oracle peaks (i.e. based on reproducibility across treatment-specific replicates). The sets of macs2-derived peaks for both AR and H3K27ac were first compared using treatment-agnostic peaks for each target. The sets of treatment-specific oracle-peaks were then compared across the combined treatment groups and targets.
fig_name <- targets %>%
paste(collapse = "_") %>%
paste0("_common_peaks.", fig_type)
lapply(
consensus_peaks,
function(x) as.character(subsetByOverlaps(all_consensus, x))
)%>%
venn.diagram(
file.path(fig_path, fig_name),
imagetype = fig_type,
units = "in",
cat.cex = 1.4,
height = 9,
width = 10,
fill = comp_cols[comps],
alpha = 0.3
)
file.remove(list.files(fig_path, pattern = "log$", full.names = TRUE))
Union of peaks found in AR and H3K27ac, and which of the two targets each peak is found in. Peaks are based on the union of both sets of consensus peaks, which are themselves independent of treatment group.
bar_cols <- treat_colours[rev(unlist(pairs))]
if (length(unique(targets)) == 1)
bar_cols <- treat_colours[
pairs %>%
unlist() %>%
unique() %>%
rev()
]
samples %>%
distinct(target, treat) %>%
mutate_all(as.character) %>%
unite(group, target, treat, remove = FALSE) %>%
split(.$group) %>%
lapply(
function(x) {
gr <- subsetByOverlaps(all_consensus, oracle_peaks[[x$target]][[x$treat]])
as.character(gr)
}
) %>%
setNames(
str_replace_all(names(.), "(.+)_(.+)", "\\1 (\\2)")
) %>%
fromList() %>%
upset(
sets = samples %>%
distinct(target, treat) %>%
mutate_all(as.character) %>%
mutate(grp = glue("{target} ({treat})")) %>%
pull("grp") %>%
as.character() %>%
rev(),
keep.order = TRUE,
order.by = "freq",
set_size.show = TRUE,
set_size.scale_max = oracle_peaks %>%
lapply(function(x) lapply(x, length)) %>%
unlist() %>%
max() %>%
multiply_by(1.15),
sets.bar.color = bar_cols
)
Macs2 peaks detected for AR and H3K27ac in the treatment groups E2 or E2DHT.
lambda <- log2(config$comparisons$fc)
c1_status <- glue("{comps[[1]]}_status")
c1_fdr <- glue("{comps[[1]]}_fdr")
c1_logfc <- glue("{comps[[1]]}_logFC")
c1_logcpm <- glue("{comps[[1]]}_AveExpr")
c1_centre <- glue("{comps[[1]]}_centre")
c2_status <- glue("{comps[[2]]}_status")
c2_fdr <- glue("{comps[[2]]}_fdr")
c2_logfc <- glue("{comps[[2]]}_logFC")
c2_logcpm <- glue("{comps[[2]]}_AveExpr")
c2_centre <- glue("{comps[[2]]}_centre")
all_windows <- db_results %>%
lapply(granges) %>%
GRangesList() %>%
unlist()%>%
sort() %>%
GenomicRanges::reduce() %>%
mutate(
region = regions[
bestOverlap(., GRangesList(lapply(gene_regions, granges)))
] %>%
factor(levels = regions)
) %>%
join_overlap_left(
db_results[[comps[[1]]]] %>%
mutate(
status = as.character(status),
peak_centre = start(
resize(keyval_range, width = 1, fix = "center")
)
) %>%
select(
!!sym(c1_logcpm) := AveExpr,
!!sym(c1_logfc) := logFC,
!!sym(c1_status) := status,
!!sym(c1_fdr) := !!sym(fdr_column),
!!sym(glue("{comps[[1]]}_fdr_mu0")) := fdr_mu0,
!!sym(glue("{comps[[1]]}_centre")) := peak_centre
)
) %>%
join_overlap_left(
db_results[[comps[[2]]]] %>%
mutate(
status = as.character(status),
peak_centre = start(
resize(keyval_range, width = 1, fix = "center")
)
) %>%
select(
!!sym(c2_logcpm) := AveExpr,
!!sym(c2_logfc) := logFC,
!!sym(c2_status) := status,
!!sym(c2_fdr) := !!sym(fdr_column),
!!sym(glue("{comps[[2]]}_fdr_mu0")) := fdr_mu0,
!!sym(glue("{comps[[2]]}_centre")) := peak_centre
)
) %>%
## Remove duplicate mappings to each range, just selecting the one with the
## lowest FDR
arrange(!!sym(c2_fdr)) %>%
distinctMC(!!sym(c1_fdr), .keep_all = TRUE) %>%
arrange(!!sym(c1_fdr)) %>%
sort()
## plyranges::mutate changes the column names using `make.names()`. Avoid!!
## Reclassify windows from the first comparison using data from the second
## to reduce false negatives. If the 2nd comparison contains a significant
## change in binding, reclassify the first using the fdr for H0: mu[0] = 0
## but only where |logFC[est]| > lambda
switch_up <- mcols(all_windows)[[c2_fdr]] < fdr_alpha &
mcols(all_windows)[[glue("{comps[[1]]}_fdr_mu0")]] < fdr_alpha &
mcols(all_windows)[[c1_logfc]] > lambda
mcols(all_windows)[[c1_status]][switch_up] <- "Up"
switch_down <- mcols(all_windows)[[c2_fdr]] < fdr_alpha &
mcols(all_windows)[[glue("{comps[[1]]}_fdr_mu0")]] < fdr_alpha &
mcols(all_windows)[[c1_logfc]] < (-1) * lambda
mcols(all_windows)[[c1_status]][switch_down] <- "Down"
mcols(all_windows)[[c1_status]] <- mcols(all_windows) %>%
.[[c1_status]] %>%
factor(levels = str_sep_to_title(names(colours$direction))) %>%
fct_explicit_na("Undetected") %>%
fct_relabel(function(x) paste(comps[[1]], x))
## Now reclassify the second comparison using changed windows in the first
switch_up <- mcols(all_windows)[[c1_fdr]] < fdr_alpha &
mcols(all_windows)[[glue("{comps[[2]]}_fdr_mu0")]] < fdr_alpha &
mcols(all_windows)[[c2_logfc]] > lambda
mcols(all_windows)[[c2_status]][switch_up] <- "Up"
switch_down <- mcols(all_windows)[[c1_fdr]] < fdr_alpha &
mcols(all_windows)[[glue("{comps[[2]]}_fdr_mu0")]] < fdr_alpha &
mcols(all_windows)[[c2_logfc]] < (-1) * lambda
mcols(all_windows)[[c2_status]][switch_down] <- "Down"
mcols(all_windows)[[c2_status]] <- mcols(all_windows) %>%
.[[c2_status]] %>%
factor(levels = str_sep_to_title(names(colours$direction))) %>%
fct_explicit_na("Undetected") %>%
fct_relabel(function(x) paste(comps[[2]], x))
mcols(all_windows)[["status"]] <- paste(
mcols(all_windows)[[c1_status]],
mcols(all_windows)[[c2_status]],
sep = " - "
) %>%
factor(levels = combs) %>%
droplevels()
mcols(all_windows)[["distance"]] <- abs(
mcols(all_windows)[[glue("{comps[[1]]}_centre")]] - mcols(all_windows)[[glue("{comps[[2]]}_centre")]]
)
## Remove any with an 'ambiguous' status
all_windows <- subset(all_windows, !is.na(status))
if (has_features)
all_windows$feature <- bestOverlap(
all_windows, external_features, var = "feature", missing = "no_feature"
) %>%
str_sep_to_title()
all_windows$hic <- NA
if (has_hic)
all_windows$hic <- overlapsAny(all_windows, hic)
## Define promoters for mapping
feat_prom <- feat_enh <- GRanges()
if (has_features) {
feat_prom <- ifelse(
"feature" %in% colnames(mcols(external_features)),
list(granges(subset(external_features, str_detect(feature, "[Pp]rom")))),
list(GRanges())
)[[1]]
feat_enh <- ifelse(
"feature" %in% colnames(mcols(external_features)),
list(granges(subset(external_features, str_detect(feature, "[Ee]nhanc")))),
list(GRanges())
)[[1]]
}
prom4mapping <- GRangesList(feat_prom, granges(gene_regions$promoters)) %>%
unlist() %>%
GenomicRanges::reduce()
all_windows <- all_windows %>%
select(any_of(c("region", "feature", "hic")), everything()) %>%
mapByFeature(
all_gr$gene, prom = prom4mapping, enh = feat_enh, gi = hic,
gr2gene = params$mapping$gr2gene,
prom2gene = params$mapping$prom2gene,
enh2gene = params$mapping$enh2gene, gi2gene = params$mapping$gi2gene
)
all_windows$detected <- all_windows$gene_name
if (has_rnaseq) all_windows$detected <- endoapply(
all_windows$detected, intersect, rnaseq$gene_name
)
n_mapped <- all_windows %>%
select(status, gene_id) %>%
as_tibble() %>%
unnest(everything()) %>%
distinct(status, gene_id) %>%
group_by(status) %>%
summarise(mapped = dplyr::n()) %>%
arrange(desc(mapped))
A set of 51,077 common windows was formed by obtaining the union of windows produced during the analysis of AR and H3K27ac individually. These common windows were formed using any overlap between target-specific windows. The window-to-gene mappings and the best overlap with genomic regions was also reformed.
In the original datasets, 17,043 windows were included for AR, whilst 41,089 windows were included for H3K27ac. The distance between each pair of windows was found by taking the centre of the original sliding window used for statistical analysis, and finding the difference. As windows were detected in an unstranded manner, all distance values were set to be positive. Taking the complete set of 51,077 windows, 6,888 (13.5%) were considered as being detected in both datasets. The median distance between windows found in both comparisons was 400bp. 19 peaks (0.3%) were found to directly overlap.
Merged windows were compared across both targets and in order to obtain the best classification for each window, the initial values of an FDR-adjusted p-value < 0.05 from either comparison were used to consider a window as showing changed binding in at least one comparison. A secondary step was then introduced for each window such that if one target was significant and the other target had 1) received an FDR-adjusted p-value < 0.05 using a point-based \(H_0\), and 2) had an estimated logFC beyond the range specified for the range-based null hypothesis (\(H_0: 0 < |\mu| < \lambda\) for \(\lambda = 0.26\)), these windows were then considered significant in the second comparison. This was to help reduce the number of windows incorrectly classified as unchanged whilst the alternative ChIP target was defined as changed.
Whilst summary tables and figures are presented below, the full set of windows and mapped genes were exported as output/pairwise_comparisons/AR_H3K27ac/AR_E2_E2DHT-H3K27ac_E2_E2DHT-pairwise_comparison.csv.gz
cp <- htmltools::tags$em(
glue(
"Summary of combined binding windows for ",
glue_collapse(comps, sep = " and "),
". Windows were classified as described above. ",
"The number of windows showing each combined pattern are given, along ",
"with the number of unique genes mapped to each set of combined windows, ",
"noting that some genes will be mapped to multiple sets of windows. ",
"The % of all genes with a window mapped from either target is given in ",
"the final column."
)
)
tbl <- all_windows %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
group_by(across(ends_with("_status"))) %>%
summarise(
windows = dplyr::n(),
mapped = length(unique(unlist(gene_id))),
.groups = "drop"
) %>%
mutate(
`% genes` = mapped / length(unique(unlist(all_windows$gene_id)))
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
!!sym(c1_status) := colDef(
name = comps[[1]],
cell = function(value) {
value <- str_extract(value, "Up|Down|Unchanged|Undetected")
if (str_detect(value, "Up")) value <- paste(value, "\u21E7")
if (str_detect(value, "Down")) value <- paste(value, "\u21E9")
paste(targets[[1]], value)
},
style = function(value) {
colour <- case_when(
str_detect(value, "Up") ~ colours$direction[["up"]],
str_detect(value, "Down") ~ colours$direction[["down"]],
str_detect(value, "Unchanged") ~ colours$direction[["unchanged"]],
str_detect(value, "Undetected") ~ colours$direction[["undetected"]]
)
list(color = colour)
},
footer = htmltools::tags$b("Total")
),
!!sym(c2_status) := colDef(
name = comps[[2]],
cell = function(value) {
value <- str_extract(value, "Up|Down|Unchanged|Undetected")
if (str_detect(value, "Up")) value <- paste(value, "\u21E7")
if (str_detect(value, "Down")) value <- paste(value, "\u21E9")
paste(targets[[2]], value)
},
style = function(value) {
colour <- case_when(
str_detect(value, "Up") ~ colours$direction[["up"]],
str_detect(value, "Down") ~ colours$direction[["down"]],
str_detect(value, "Unchanged") ~ colours$direction[["unchanged"]],
str_detect(value, "Undetected") ~ colours$direction[["undetected"]]
)
list(color = colour)
}
),
windows = colDef(
name = "Number of Windows",
cell = function(value) comma(value, 1),
style = function(value) {
bar_style(width = 0.9*value / length(all_windows), fill = "#B3B3B3")
},
footer = htmltools::tags$b(comma(length(all_windows), 1))
),
mapped = colDef(
name = "Mapped Genes",
cell = function(value) comma(value, 1),
style = function(value) {
bar_style(width = 0.9*value /length(unique(unlist(all_windows$gene_id))), fill = "#B3B3B3")
},
footer = htmltools::tags$b(comma(length(unique(unlist(all_windows$gene_id))), 1))
),
`% genes` = colDef(
name = "% Of All Mapped Genes",
cell = function(x) percent(x, 0.1),
style = function(value) {
bar_style(width = 0.9*value, fill = "#B3B3B3")
}
)
)
)
div(class = "table",
div(class = "table-header",
div(class = "caption", cp),
tbl
)
)
df <- all_windows %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
dplyr::filter(!str_detect(status, "Undetected")) %>%
droplevels()
df %>%
ggplot(
aes(
!!sym(c1_logfc), !!sym(c2_logfc),
colour = status
)
) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0) +
geom_hline(
yintercept = c(1, -1) * lambda,
linetype = 2, colour = "grey"
) +
geom_vline(xintercept = 0) +
geom_vline(
xintercept = c(1, -1)* lambda,
linetype = 2, colour = "grey"
) +
geom_text_repel(
aes(label = lab),
data = . %>%
dplyr::filter(
!str_detect(status, "Unchanged.+Unchanged"),
vapply(detected, length, integer(1)) > 0
) %>%
mutate(
total_lfc = abs(!!sym(c1_logfc)) + abs(!!sym(c2_logfc))
) %>%
group_by(status) %>%
dplyr::filter(total_lfc == max(total_lfc)) %>%
mutate(
detected = vapply(detected, paste, character(1), collapse = "; "),
lab = paste(status, detected, sep = ": ") %>% str_wrap(25)
),
size = 4,
bg.r = 0.001, bg.color = "grey70",
show.legend = FALSE
) +
geom_text(
aes(x, y, label = lab),
data = . %>%
mutate(
x = 0.9*max((!!sym(c1_logfc))),
y = min(!!sym(c2_logfc))
) %>%
summarise(
x = unique(x),
y = unique(y),
n = dplyr::n(),
lab = paste("n =", comma(n, 1)),
.groups = "drop"
),
inherit.aes = FALSE
) +
scale_colour_manual(values = direction_colours) +
labs(
x = paste(comps[[1]], "logFC"),
y = paste(comps[[2]], "logFC"),
colour = "Status"
) +
theme(legend.position = "top")
Comparative changes in both AR and H3K27ac. The window with most extreme combined change in binding is shown with any mapped genes labelled. The range around zero used for range-based hypothesis testing is indicated with grey dashed lines.
df <- all_windows %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
dplyr::filter(!str_detect(status, "Undetected")) %>%
droplevels()
df %>%
ggplot(
aes(
!!sym(c1_logfc), !!sym(c2_logfc),
colour = status
)
) +
geom_point(alpha = 0.6) +
geom_hline(yintercept = 0) +
geom_hline(
yintercept = c(1, -1) * lambda,
linetype = 2, colour = "grey"
) +
geom_vline(xintercept = 0) +
geom_vline(
xintercept = c(1, -1) * lambda,
linetype = 2, colour = "grey"
) +
geom_text_repel(
aes(label = detected),
data = . %>%
dplyr::filter(
!str_detect(status, "Unchanged.+Unchanged"),
vapply(detected, length, integer(1)) > 0
) %>%
mutate(
total_lfc = abs(!!sym(c1_logfc)) + abs(!!sym(c2_logfc))
) %>%
group_by(region) %>%
dplyr::filter(total_lfc == max(total_lfc)) %>%
mutate(
detected = vapply(detected, paste, character(1), collapse = "; ") %>%
str_wrap(25)
),
size = 3, bg.color = "grey70", bg.r = 0.001,
show.legend = FALSE
) +
geom_text(
aes(x, y, label = lab),
data = . %>%
mutate(
x = 0.9*max(!!sym(c1_logfc)),
y = min(!!sym(c2_logfc))
) %>%
group_by(region) %>%
summarise(
x = unique(x),
y = unique(y),
n = dplyr::n(),
lab = paste("n =", comma(n, 1)),
.groups = "drop"
),
inherit.aes = FALSE
) +
facet_wrap(~region) +
scale_colour_manual(values = direction_colours) +
labs(
x = paste(comps[[1]], "logFC"),
y = paste(comps[[2]], "logFC"),
colour = "Status"
) +
theme(legend.position = "top")
Comparative changes in both AR and H3K27ac. The window with most extreme combined change in binding for each separate region is shown with any mapped genes labelled. The range around zero used for range-based hypothesis testing is indicated with grey dashed lines.
all_windows %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
dplyr::filter(
!!sym(c1_status) != paste(comps[[1]], "Undetected")
) %>%
mutate(
!!sym(c1_status) := fct_relabel(
!!sym(c1_status),
str_extract,
pattern = "Up|Down|Unchanged|Undetected"
),
!!sym(c2_status) := fct_relabel(
!!sym(c2_status),
str_extract,
pattern = "Up|Down|Unchanged|Undetected"
)
) %>%
droplevels() %>%
ggplot(
aes(
!!sym(c1_logcpm),
!!sym(c1_logfc),
colour = !!sym(c1_status),
fill = !!sym(c2_status)
)
) +
geom_point(alpha = 0.6) +
geom_xsideboxplot(
aes(y = !!sym(c2_status)),
orientation = "y", colour = "black"
) +
geom_ysideboxplot(
aes(x = !!sym(c2_status)),
orientation = "x", colour = "black"
) +
geom_ysideline(
aes(x, y),
data = tibble(x = seq(0, 5), y = 0),
inherit.aes = FALSE
) +
geom_hline(yintercept = 0) +
geom_hline(yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey30") +
scale_xsidey_discrete() +
scale_ysidex_discrete(guide = guide_axis(angle = 90)) +
scale_colour_manual(
values = colours$direction %>%
setNames(str_to_title(names(.))) %>%
.[!str_detect(names(.), "Undetected")]
) +
scale_fill_manual(
values = colours$direction %>%
setNames(str_to_title(names(.)))
) +
labs(
x = paste(comps[[1]], "logCPM"),
y = paste(comps[[1]], "logFC"),
colour = str_replace(comps[[1]], ": ", "\n"),
fill = str_replace_all(comps[[2]], ": ", "\n")
) +
theme(
ggside.panel.scale.y = .2,
ggside.panel.scale.x = .25,
axis.title.x = element_text(hjust = 0.4, vjust = 1),
axis.title.y = element_text(hjust = 0.4, vjust = 1)
)
Differential binding patterns of AR by H3K27ac status. AR values are shown as points coloured by their individual classification as Up, Down or Unchanged, with the distributions of these points shown as boxplots by H3K27ac status in the side panels.
all_windows %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
as_tibble() %>%
dplyr::filter(
!!sym(c2_status) != paste(comps[[2]], "Undetected")
) %>%
mutate(
!!sym(c1_status) := fct_relabel(
!!sym(c1_status),
str_extract,
pattern = "Up|Down|Unchanged|Undetected"
),
!!sym(c2_status) := fct_relabel(
!!sym(c2_status),
str_extract,
pattern = "Up|Down|Unchanged|Undetected"
)
) %>%
droplevels() %>%
ggplot(
aes(
!!sym(c2_logcpm),
!!sym(c2_logfc),
colour = !!sym(c2_status),
fill = !!sym(c1_status)
)
) +
geom_point(alpha = 0.6) +
geom_xsideboxplot(
aes(y = !!sym(c1_status)),
orientation = "y", colour = "black"
) +
geom_ysideboxplot(
aes(x = !!sym(c1_status)),
orientation = "x", colour = "black"
) +
geom_ysideline(
aes(x, y),
data = tibble(x = seq(0, 5), y = 0),
inherit.aes = FALSE
) +
geom_hline(yintercept = 0) +
geom_hline(yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey30") +
scale_xsidey_discrete() +
scale_ysidex_discrete(guide = guide_axis(angle = 90)) +
scale_colour_manual(
values = colours$direction %>%
setNames(str_to_title(names(.))) %>%
.[!str_detect(names(.), "Undetected")]
) +
scale_fill_manual(
values = colours$direction %>%
setNames(str_to_title(names(.)))
) +
labs(
x = paste(comps[[2]], "logCPM"),
y = paste(comps[[2]], "logFC"),
colour = str_replace(comps[[2]], ": ", "\n"),
fill = str_replace_all(comps[[1]], ": ", "\n")
) +
theme(
ggside.panel.scale.y = .2,
ggside.panel.scale.x = .25,
axis.title.x = element_text(hjust = 0.4, vjust = 1),
axis.title.y = element_text(hjust = 0.4, vjust = 1)
)
Differential binding patterns of H3K27ac by AR status. H3K27ac values are shown as points coloured by their individual classification as Up, Down or Unchanged, with the distributions of these shown as boxplots by AR status in the side panels.
all_windows$grp <- paste(
mcols(all_windows)[[c1_status]],
mcols(all_windows)[[c2_status]],
all_windows$region
)
df <- all_windows %>%
split(.$grp) %>%
lapply(
function(x) {
tibble(
"{c1_status}" := unique(mcols(x)[[c1_status]]),
"{c2_status}" := unique(mcols(x)[[c2_status]]),
region = unique(mcols(x)[["region"]]),
n = length(x)
)
}
) %>%
bind_rows() %>%
dplyr::filter(
str_detect(!!sym(c1_status), "Up|Down") | str_detect(!!sym(c2_status), "Up|Down")
) %>%
group_by(!!sym(c1_status), !!sym(c2_status)) %>%
mutate(total = sum(n)) %>%
ungroup() %>%
arrange(!!sym(c1_status), !!sym(c2_status), region) %>%
mutate(
x0 = as.integer(!!sym(c1_status)),
y0 = as.integer(!!sym(c2_status)),
r = total / sum(total),
r = 0.5* r / max(r),
!!sym(c1_status) := fct_relabel(!!sym(c1_status), str_extract, pattern = "Up|Down|Un.+"),
!!sym(c2_status) := fct_relabel(!!sym(c2_status), str_extract, pattern = "Up|Down|Un.+")
)
df %>%
ggplot() +
ggforce::stat_pie(
aes(x0 = x0, y0 = y0, r0 = 0, r = r, fill = region, amount = n)
) +
geom_label(
aes(x0, y0, label = comma(total)),
data = . %>%
distinct(x0, y0, total) %>%
dplyr::filter(total > 0.05 * sum(total)),
alpha = 0.8, size = 4
) +
coord_equal() +
scale_fill_manual(values = region_colours) +
scale_x_continuous(
breaks = seq_along(levels(df[[c1_status]])),
labels = levels(df[[c1_status]])
) +
scale_y_continuous(
breaks = seq_along(levels(df[[c2_status]])),
labels = levels(df[[c2_status]])
) +
labs(
x = paste(comps[[1]], "Status"),
y = paste(comps[[2]], "Status"),
fill = "Region"
) +
theme(
panel.grid = element_blank()
)
Distribution of binding patterns by genomic regions as defined in earlier sections. Windows are only shown if change was detected in one of the comparisons.
all_windows %>%
as_tibble() %>%
dplyr::filter(!is.na(distance)) %>%
arrange(status, distance) %>%
group_by(status) %>%
mutate(
n = dplyr::n(),
q = seq_along(status) / n
) %>%
dplyr::filter(distance < 1e3, !str_detect(status, "Unchanged.+Unchanged")) %>%
ggplot(aes(distance, q, colour = status)) +
geom_line(linewidth = 1) +
scale_x_continuous(breaks = seq(0, 10, by = 2) * 100) +
scale_y_continuous(
labels = percent, expand = expansion(c(0, 0.05)),
breaks = seq(0, 1, by = 0.2)
) +
scale_colour_manual(
values = direction_colours
) +
labs(
x = "Distance Between Window Centres (bp)",
y = "Percentile",
colour = "Combined Status"
)
Distances between windows where maximal signal was detected for each target. Windows are only shown if change was detected in one or more comparisons. The x-axis is truncated at 1kb
all_windows %>%
as_tibble() %>%
dplyr::filter(!is.na(distance)) %>%
arrange(region, distance) %>%
group_by(region) %>%
mutate(
n = dplyr::n(),
q = seq_along(region) / n
) %>%
dplyr::filter(distance < 1e3) %>%
ggplot(aes(distance, q, colour = region)) +
geom_line(linewidth = 1) +
scale_x_continuous(breaks = seq(0, 10, by = 2) * 100) +
scale_y_continuous(
labels = percent, expand = expansion(c(0, 0.05)),
breaks = seq(0, 1, by = 0.2)
) +
scale_colour_manual(values = region_colours) +
labs(
x = "Distance Between Window Centres (bp)",
y = "Percentile",
colour = "Genomic Region"
)
Distribution of distances between peaks separated by genomic region. All sites are included regardless of changed binding. The x-axis is truncated 1kb.
A series of profile heatmaps were generated for any set of pairwise comparisons with more than 25 ranges, and where binding was detected as changed in at least one of the comparisons. If comparing two targets, peaks were centred at the mid-point between the ranges of individual maximal signal. If only one target is detected within a range, peaks are centred at the range with maximal signal. Scales for fill and average signal (both logCPM) are held constant across all plots for easier comparison between groups.
## Set the BWFL as a single object incorporating target & treatment names
fl <- seq_along(pairs) %>%
lapply(
function(x) {
file.path(
macs2_path[[x]], glue("{pairs[[x]]}_merged_treat_pileup.bw")
) %>%
setNames(paste(names(pairs)[[x]], pairs[[x]]))
}
)%>%
unlist() %>%
.[!duplicated(.)]
bwfl <- setNames(BigWigFileList(fl), names(fl))
grl_to_plot <- all_windows %>%
split(.$status) %>%
.[str_detect(names(.),"Up|Down")] %>%
.[vapply(., length, integer(1)) > 25] %>% # Only show groups with > 10 ranges
lapply(
mutate,
centre = rowMeans(cbind(!!sym(c1_centre), !!sym(c2_centre)), na.rm = TRUE),
rng = paste0(seqnames, ":", as.integer(centre))
) %>%
lapply(function(x) GRanges(x$rng, seqinfo = sq)) %>%
GRangesList()
profile_width <- 5e3
x_lab <- c(
seq(0, floor(0.5 * profile_width / 1e3), length.out = 3),
seq(-floor(0.5 * profile_width / 1e3), 0, length.out = 3)
) %>%
sort() %>%
unique()
n_bins <- 100
sig_profiles <- mclapply(
grl_to_plot,
function(x) getProfileData(
bwfl, x, upstream = profile_width / 2, bins = n_bins
),
mc.cores = min(length(grl_to_plot), threads)
)
profile_heatmaps <- sig_profiles %>%
mclapply(
plotProfileHeatmap,
profileCol = "profile_data",
xLab = "Distance from Centre (bp)",
fillLab = "logCPM",
colour = "name",
mc.cores = min(length(grl_to_plot), threads)
)
fill_range <- profile_heatmaps %>%
lapply(function(x) x$data[,"score"]) %>%
unlist() %>%
c(0) %>%
range()
sidey_range <- profile_heatmaps %>%
lapply(function(x) x$layers[[3]]$data$y) %>%
unlist() %>%
range()
profile_heatmaps <- profile_heatmaps %>%
lapply(
function(x) {
x +
scale_x_continuous(
breaks = x_lab * 1e3, labels = x_lab, expand = expansion(0)
) +
scale_xsidey_continuous(limits = sidey_range) +
scale_fill_gradientn(colours = colours$heatmaps, limits = fill_range) +
scale_colour_manual(
values = lapply(
seq_along(pairs),
function(i) setNames(
treat_colours[pairs[[i]]],
paste(names(pairs)[[i]], pairs[[i]])
)
) %>%
unlist() %>%
.[!duplicated(names(.))]
) +
labs(
x = "Distance from Centre (kb)",
fill = "logCPM", linetype = "Consensus\nPeak\nOverlap",
colour = "Sample"
)
}
)
htmltools::tagList(
mclapply(
seq_along(profile_heatmaps),
function(x) {
## Export the image
pw <- names(profile_heatmaps)[[x]]
img_out <- file.path(
fig_path,
pw %>%
str_remove_all("(:|Vs. |- )") %>%
str_replace_all(" ", "_") %>%
str_replace_all("_-_", "-") %>%
paste0("_profile_heatmap.", fig_type)
)
n_ranges <- length(unique(profile_heatmaps[[x]]$data$range))
n_samples<- length(unique(profile_heatmaps[[x]]$data$name))
fig_fun(
filename = img_out,
width = knitr::opts_current$get("fig.width") * n_samples / 4,
height = min(
3.5 + knitr::opts_current$get("fig.height") * n_ranges / 1.5e3,
10
)
)
print(profile_heatmaps[[x]])
dev.off()
## Create html tags
fig_link <- str_extract(img_out, "assets.+")
cp <- htmltools::tags$em(
glue(
"
All {n_ranges} ranges showing the pattern of {pw}. Ranges are centered
at the midpoint between ranges identified as the maximal signal range.
"
)
)
htmltools::div(
htmltools::div(
id = img_out %>%
basename() %>%
str_remove_all(glue(".{fig_type}$")) %>%
str_to_lower() %>%
str_replace_all("_", "-"),
class="section level3",
htmltools::h3(pw),
htmltools::div(
class = "figure", style = "text-align: center",
htmltools::img(src = fig_link, width = "100%"),
htmltools::tags$caption(cp)
)
)
)
},
mc.cores = threads
)
)
For the initial set of visualisations, the sets of windows with the strongest signal were selected from within each group. Groups were restricted to those where differential binding was seen in at least comparison.
grl_to_plot <- all_windows %>%
filter(
!str_detect(status, "Un.+Un"),
vapply(detected, length, integer(1)) > 0
) %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
nest(
AveExpr = ends_with("AveExpr")
) %>%
mutate(
max_signal = vapply(AveExpr, function(x) max(unlist(x)), numeric(1))
) %>%
group_by(status) %>%
filter(max_signal == max(max_signal)) %>%
dplyr::select(
range, status, ends_with("logFC")
) %>%
droplevels() %>%
split(.$status) %>%
lapply(pull, "range") %>%
lapply(function(x) x[1]) %>%
lapply(GRanges, seqinfo = sq) %>%
GRangesList()
## The coverage
bwfl <- seq_along(pairs) %>%
lapply(
function(x) {
file.path(
macs2_path[[x]], glue("{pairs[[x]]}_merged_treat_pileup.bw")
) %>%
BigWigFileList() %>%
setNames(pairs[[x]])
}
) %>%
setNames(comps)
line_col <- lapply(bwfl, function(x) treat_colours[names(x)])
## Coverage annotations
annot <- comps %>%
lapply(
function(x) {
col <- glue("{x}_status")
select(all_windows, all_of(col)) %>%
mutate(
status = str_extract(!!sym(col), "Down|Up|Unchanged|Undetected")
) %>%
splitAsList(.$status) %>%
lapply(granges) %>%
lapply(unique) %>%
GRangesList()
}
) %>%
setNames(comps)
annot_col <- unlist(colours$direction) %>%
setNames(str_to_title(names(.)))
## Coverage y-limits
gr <- unlist(grl_to_plot)
y_lim <- lapply(
bwfl,
function(x) {
cov <- lapply(x, import.bw, which = gr)
unlist(lapply(cov, function(y) max(y$score))) %>%
c(0) %>%
range()
}
)
## The features track
feat_gr <- gene_regions %>%
lapply(granges) %>%
GRangesList()
feature_colours <- colours$regions
if (has_features) {
feat_gr <- list(Regions = feat_gr)
feat_gr$Features <- splitAsList(external_features, external_features$feature)
feature_colours <- list(
Regions = unlist(colours$regions),
Features = unlist(colours$features)
)
}
## The genes track
hfgc_genes <- trans_models
gene_col <- "grey"
if (has_rnaseq) {
rna_lfc_col <- colnames(rnaseq)[str_detect(str_to_lower(colnames(rnaseq)), "logfc")][1]
rna_fdr_col <- colnames(rnaseq)[str_detect(str_to_lower(colnames(rnaseq)), "fdr|adjp")][1]
if (!is.na(rna_lfc_col) & !is.na(rna_fdr_col)) {
hfgc_genes <- trans_models %>%
mutate(
status = case_when(
!gene %in% rnaseq$gene_id ~ "Undetected",
gene %in% dplyr::filter(
rnaseq, !!sym(rna_lfc_col) > 0, !!sym(rna_fdr_col) < fdr_alpha
)$gene_id ~ "Up",
gene %in% dplyr::filter(
rnaseq, !!sym(rna_lfc_col) < 0, !!sym(rna_fdr_col) < fdr_alpha
)$gene_id ~ "Down",
gene %in% dplyr::filter(
rnaseq, !!sym(rna_fdr_col) >= fdr_alpha
)$gene_id ~ "Unchanged",
)
) %>%
splitAsList(.$status) %>%
lapply(select, -status) %>%
GRangesList()
gene_col <- colours$direction %>%
setNames(str_to_title(names(.)))
}
}
## External Coverage (Optional)
if (!is.null(config$external$coverage)) {
ext_cov_path <- config$external$coverage %>%
lapply(unlist) %>%
lapply(function(x) setNames(here::here(x), names(x)))
bwfl <- c(
bwfl[comps],
lapply(ext_cov_path, function(x) BigWigFileList(x) %>% setNames(names(x)))
)
line_col <- c(
line_col[comps],
ext_cov_path %>%
lapply(
function(x) {
missing <- setdiff(names(x), names(treat_colours))
cmn <- intersect(names(x), names(treat_colours))
col <- setNames(character(length(names(x))), names(x))
if (length(cmn) > 0) col[cmn] <- treat_colours[cmn]
if (length(missing) > 0)
col[missing] <- hcl.colors(
max(5, length(missing)), "Zissou 1")[seq_along(missing)]
col
}
)
)
y_ranges <- grl_to_plot %>%
unlist() %>%
granges() %>%
resize(w = 10 * width(.), fix = 'center')
y_lim <- c(
y_lim[comps],
bwfl[names(config$external$coverage)] %>%
lapply(
function(x) {
GRangesList(lapply(x, import.bw, which = y_ranges)) %>%
unlist() %>%
filter(score == max(score)) %>%
mcols() %>%
.[["score"]] %>%
c(0) %>%
range()
}
)
)
}
htmltools::tagList(
mclapply(
seq_along(grl_to_plot),
function(x) {
## Export the image
img_out <- file.path(
fig_path,
names(grl_to_plot)[[x]] %>%
str_remove_all("(:|Vs. |- )") %>%
str_replace_all(" ", "_") %>%
paste0("_AveExpr.", fig_type)
)
fig_fun(
filename = img_out, width = 10, height = 8
)
ct <- FALSE
if (length(subsetByOverlaps(all_gr$transcript, grl_to_plot[[x]])) > 10)
ct <- "meta"
plotHFGC(
grl_to_plot[[x]],
features = feat_gr, featcol = feature_colours,
featsize = 1 + has_features,
genes = hfgc_genes, genecol = gene_col,
coverage = bwfl, linecol = line_col,
annotation = annot, annotcol = annot_col,
cytobands = bands_df,
rotation.title = 90,
zoom = 10,
ylim = y_lim,
collapseTranscripts = ct,
col.title = "black", background.title = "white", showAxis = FALSE
)
dev.off()
## Create html tags
fig_link <- str_extract(img_out, "assets.+")
gn <- unlist(subsetByOverlaps(all_windows, grl_to_plot[[x]])$detected)
cp <- htmltools::tags$em(
glue(
"
Highlighted region corresponds to the highest signal within the group
{names(grl_to_plot)[[x]]}. The most likely target
{ifelse(length(gn) == 1, 'gene is ', 'genes are ')}
{collapseGenes(gn, format = '')}
"
)
)
htmltools::div(
htmltools::div(
id = names(grl_to_plot)[[x]] %>%
str_remove_all("(:|Vs. |- )") %>%
str_replace_all(" ", "-") %>%
str_to_lower() %>%
paste0("-aveexpr"),
class="section level3",
htmltools::h3(names(grl_to_plot)[[x]]),
htmltools::div(
class = "figure", style = "text-align: center",
htmltools::img(src = fig_link, width = 960),
htmltools::p(
class = "caption", htmltools::tags$em(cp)
)
)
)
)
},
mc.cores = threads
)
)
Highlighted region corresponds to the highest signal within the group AR Up - H3K27ac Up. The most likely target gene is EIF4BP8
Highlighted region corresponds to the highest signal within the group AR Up - H3K27ac Down. The most likely target genes are FLOT1, HCG20, IER3, IER3-AS1 and RN7SL353P
Highlighted region corresponds to the highest signal within the group AR Up - H3K27ac Unchanged. The most likely target genes are AC092171.1, AC092171.3, AC092171.4, AC092171.5, AC093620.1 and TNRC18
Highlighted region corresponds to the highest signal within the group AR Unchanged - H3K27ac Up. The most likely target genes are RARB and TOP2B
Highlighted region corresponds to the highest signal within the group AR Unchanged - H3K27ac Down. The most likely target gene is WWOX
For the initial set of visualisations, the sets of windows with the strongest signal were selected from within each group. Groups were restricted to those where differential binding was seen in at least comparison.
grl_to_plot <- all_windows %>%
filter(
!str_detect(status, "Un.+Un"),
vapply(detected, length, integer(1)) > 0
) %>%
as_tibble() %>%
rename_all(
str_replace_all, pattern = "\\.Vs\\.\\.", replacement = " Vs. "
) %>%
rename_all(
str_replace_all, pattern = "\\.\\.", replacement = ": "
) %>%
nest(
logFC = ends_with("logFC")
) %>%
mutate(
max_logFC = vapply(logFC, function(x) max(abs(unlist(x))), numeric(1))
) %>%
group_by(status) %>%
filter(max_logFC == max(max_logFC)) %>%
dplyr::select(
range, status, ends_with("logFC")
) %>%
droplevels() %>%
split(.$status) %>%
lapply(pull, "range") %>%
lapply(GRanges, seqinfo = sq) %>%
GRangesList()
## Coverage y-limits
gr <- unlist(grl_to_plot)
y_lim <- lapply(
bwfl,
function(x) {
cov <- lapply(x, import.bw, which = gr)
unlist(lapply(cov, function(y) max(y$score))) %>%
c(0) %>%
range()
}
)
## External Coverage (Optional)
if (!is.null(config$external$coverage)) {
y_ranges <- grl_to_plot %>%
unlist() %>%
granges() %>%
resize(w = 10 * width(.), fix = 'center')
y_lim <- c(
y_lim[comps],
bwfl[names(config$external$coverage)] %>%
lapply(
function(x) {
GRangesList(lapply(x, import.bw, which = y_ranges)) %>%
unlist() %>%
filter(score == max(score)) %>%
mcols() %>%
.[["score"]] %>%
c(0) %>%
range()
}
)
)
}
htmltools::tagList(
mclapply(
seq_along(grl_to_plot),
function(x) {
## Export the image
img_out <- file.path(
fig_path,
names(grl_to_plot)[[x]] %>%
str_remove_all("(:|Vs. |- )") %>%
str_replace_all(" ", "_") %>%
paste0("_logFC.", fig_type)
)
fig_fun(
filename = img_out, width = 10, height = 8
)
ct <- FALSE
if (length(subsetByOverlaps(all_gr$transcript, grl_to_plot[[x]])) > 10)
ct <- "meta"
plotHFGC(
grl_to_plot[[x]],
features = feat_gr, featcol = feature_colours,
featsize = 1 + has_features,
genes = hfgc_genes, genecol = gene_col,
coverage = bwfl, linecol = line_col,
annotation = annot, annotcol = annot_col,
cytobands = bands_df,
rotation.title = 90,
zoom = 10,
ylim = y_lim,
collapseTranscripts = ct,
col.title = "black", background.title = "white", showAxis = FALSE
)
dev.off()
## Create html tags
fig_link <- str_extract(img_out, "assets.+")
gn <- unlist(subsetByOverlaps(all_windows, grl_to_plot[[x]])$detected)
cp <- htmltools::tags$em(
glue(
"
Highlighted region corresponds to the largest change within the group
{names(grl_to_plot)[[x]]}. The most likely target
{ifelse(length(gn) == 1, 'gene is ', 'genes are ')}
{collapseGenes(gn, format = '')}
"
)
)
htmltools::div(
htmltools::div(
id = names(grl_to_plot)[[x]] %>%
str_remove_all("(:|Vs. |- )") %>%
str_replace_all(" ", "-") %>%
str_to_lower() %>%
paste0("-logfc"),
class="section level3",
htmltools::h3(names(grl_to_plot)[[x]]),
htmltools::div(
class = "figure", style = "text-align: center",
htmltools::img(src = fig_link, width = 960),
htmltools::p(
class = "caption", htmltools::tags$em(cp)
)
)
)
)
},
mc.cores = threads
)
)
Highlighted region corresponds to the largest change within the group AR Up - H3K27ac Up. The most likely target gene is AC078864.2
Highlighted region corresponds to the largest change within the group AR Up - H3K27ac Down. The most likely target gene is PTGER3
Highlighted region corresponds to the largest change within the group AR Up - H3K27ac Unchanged. The most likely target gene is RN7SL131P
Highlighted region corresponds to the largest change within the group AR Unchanged - H3K27ac Up. The most likely target gene is GOLPH3
Highlighted region corresponds to the largest change within the group AR Unchanged - H3K27ac Down. The most likely target gene is WWOX
min_gs_size <- params$enrichment$min_size
if (is.null(min_gs_size) | is.na(min_gs_size)) min_gs_size <- 0
max_gs_size <- params$enrichment$max_size
if (is.null(max_gs_size) | is.na(max_gs_size)) max_gs_size <- Inf
min_sig <- params$enrichment$min_sig
if (is.null(min_sig) | is.na(min_sig)) min_sig <- 1
all_ids <- genes_gr %>%
mutate(w = width) %>%
as_tibble() %>%
dplyr::select(gene_id, w) %>%
arrange(desc(w)) %>%
distinct(gene_id, w) %>%
arrange(gene_id) %>%
left_join(
all_windows$gene_id %>%
unlist() %>%
table() %>%
enframe(name = "gene_id", value = "n_windows"),
by = "gene_id"
) %>%
dplyr::filter(gene_id %in% genes_gr$gene_id) %>%
mutate(
n_windows = as.integer(ifelse(is.na(n_windows), 0, n_windows))
)
msigdb <- msigdbr(species = "Homo sapiens") %>%
dplyr::filter(
gs_cat %in% unlist(params$enrichment$msigdb$gs_cat) |
gs_subcat %in% unlist(params$enrichment$msigdb$gs_subcat),
str_detect(ensembl_gene, "^E")
) %>%
dplyr::rename(gene_id = ensembl_gene, gene_name = gene_symbol) %>% # For easier integration
dplyr::select(-starts_with("human"), -contains("entrez")) %>%
dplyr::filter(gene_id %in% all_ids$gene_id) %>%
group_by(gs_name) %>%
mutate(n = dplyr::n()) %>%
ungroup() %>%
dplyr::filter(n >= min_gs_size, n <= max_gs_size)
gs_by_gsid <- msigdb %>%
split(.$gs_name) %>%
mclapply(pull, "gene_id", mc.cores = threads) %>%
mclapply(unique, mc.cores = threads)
gs_by_geneid <- msigdb %>%
split(.$gene_id) %>%
mclapply(pull, "gs_name", mc.cores = threads) %>%
mclapply(unique, mc.cores = threads)
gs_url <- msigdb %>%
distinct(gs_name, gs_url) %>%
mutate(
gs_url = ifelse(
gs_url == "", "
http://www.gsea-msigdb.org/gsea/msigdb/collections.jsp",
gs_url
)
) %>%
with(setNames(gs_url, gs_name))
min_network_size <- params$networks$min_size
if (is.null(min_network_size) | is.na(min_network_size))
min_network_size <- 2
max_network_size <- params$networks$max_size
if (is.null(max_network_size) | is.na(max_network_size))
max_network_size <- Inf
max_network_dist <- params$networks$max_distance
if (is.null(max_network_dist) | is.na(max_network_dist))
max_network_dist <- 1
net_layout <- params$networks$layout
enrich_alpha <- params$enrichment$alpha
adj_method <- match.arg(params$enrichment$adj, p.adjust.methods)
adj_desc <- case_when(
p.adjust.methods %in% c("fdr", "BH") ~ "the Benjamini-Hochberg FDR",
p.adjust.methods %in% c("BY") ~ "the Benjamini-Yekutieli FDR",
p.adjust.methods %in% c("bonferroni") ~ "the Bonferroni",
p.adjust.methods %in% c("holm") ~ "Holm's",
p.adjust.methods %in% c("hommel") ~ "Hommel's",
p.adjust.methods %in% c("hochberg") ~ "Hochberg's",
p.adjust.methods %in% c("none") ~ "no"
) %>%
setNames(p.adjust.methods)
comp_ids <- comps %>%
lapply(
function(x) {
subset(all_windows, !str_detect(status, paste(x, "Undetected")))$gene_id
}
) %>%
lapply(unlist) %>%
lapply(unique) %>%
lapply(intersect, genes_gr$gene_id) %>%
setNames(comps)
mapped_ids <- comp_ids %>%
unlist() %>%
unique()
gene_lengths <- structure(width(all_gr$gene), names = all_gr$gene$gene_id)[mapped_ids]
group_ids <- all_windows %>%
split(f = .$status) %>%
mclapply(
function(x) unique(unlist(x$gene_id)),
mc.cores = threads
)
The first level of enrichment testing performed was to simply check the genes mapped to a binding site in either comparison, with no consideration being paid to the dynamics of binding in either comparison. This will capture and describe the overall biological activity being jointly regulated.
The genes in each section and intersection in the following Venn
Diagram will then be analysed against the complete set of 26,098 genes
mapped to either AR or H3K27ac. Firstly, all 26,098 genes mapped to a
binding site were tested for enrichment in comparison to the set of
62,400 annotated genes. For this analysis, gene width was used as a
potential source of sampling bias and Wallenius’ Non-Central
Hypergeomteric was used as implemented in goseq (Young et al. 2010).
The sets of genes mapped to each section of the Venn Diagram were then tested for enrichment in comparison to the larger set of 26,098 mapped genes. The standard hypergeometric distribution was used for these analyses.
If 4 or more gene-sets were considered to be enriched, a network plot was produced using the same methodology as for results from differential binding for individual comparisons.
venn_params <- list(
area1 = length(comp_ids[[1]]),
area2 = length(comp_ids[[2]]),
cross.area = sum(duplicated(unlist(comp_ids))),
category = comps,
fill = comp_cols[comps],
cex = 1.2, cat.cex = 1.2
)
grid.newpage()
do.call("draw.pairwise.venn", venn_params)
Venn Diagram showing genes mapped to a binding region associated with AR or H3K27ac. Of the 62,400 genes under consideration, a total of 26,098 genes were mapped to a binding region across both comparisons, leaving 36,302 genes unmapped to a binding region across either comparison.
pwf_mapped <- all_gr$gene %>%
mutate(w = width) %>%
as_tibble() %>%
arrange(gene_id, desc(w)) %>%
distinct(gene_id, w) %>%
mutate(mapped = gene_id %in% mapped_ids) %>%
with(nullp(setNames(mapped, gene_id), bias.data = log10(w)))
goseq_mapped_res <- pwf_mapped %>%
goseq(gene2cat = gs_by_geneid) %>%
dplyr::filter(numDEInCat > 0) %>%
as_tibble() %>%
dplyr::select(
gs_name = category, pval = over_represented_pvalue, starts_with("num")
) %>%
mutate(
adj_p = p.adjust(pval, adj_method),
enriched = adj_p < enrich_alpha
) %>%
dplyr::filter(numDEInCat >= min_sig) %>%
left_join(
dplyr::filter(msigdb, gene_id %in% mapped_ids)[c("gs_name", "gene_name", "gs_url", "gs_description")],
by = "gs_name"
) %>%
chop(gene_name) %>%
mutate(
gene_name = vapply(gene_name, paste, character(1), collapse = "; ")
)
any_goseq_mapped <- sum(goseq_mapped_res$adj_p < enrich_alpha) > 0
tg_mapped <- make_tbl_graph(
goseq_mapped_res,
gs = gs_by_gsid %>%
lapply(intersect, rownames(subset(pwf_mapped, DEgenes))) %>%
.[vapply(., length, integer(1)) > 0]
)
plot_network_mapped <- length(tg_mapped) >= min_network_size
tbl <- goseq_mapped_res %>%
dplyr::filter(enriched) %>%
dplyr::select(
gs_name, gs_description, pval, adj_p, starts_with("num"), gene_name
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
gs_name = colDef(
name = "Gene Set",
cell = function(value) htmltools::tags$a(
href = gs_url[[value]],
target = "_blank",
str_replace_all(value, "_", " ")
),
html = TRUE,
maxWidth = 150
),
gs_description = colDef(
name = "Description",
cell = function(value) str_trunc(value, width = 150)
),
pval = colDef(
name = "P-Value",
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
adj_p = colDef(
name = "p<sub>adj</sub>", html = TRUE,
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
numDEInCat = colDef(
name = "Mapped Genes",
cell = function(value) comma(value, 1),
maxWidth = 80
),
numInCat = colDef(
name = "Genes in Gene-Set",
cell = function(value) comma(value, 1),
maxWidth = 80
),
gene_name = colDef(
name = "Gene Names",
cell = function(value) with_tooltip(value, width = 150)
)
)
)
cp <- glue(
"
All {comma(length(mapped_ids))} genes with a mapped binding window from
either comparison were compared to the complete set of
{comma(length(unique(all_gr$gene$gene_id)))} annotated genes, in order to
ascertain which key gene-sets and pathways were likely to be targeted in either analysis.
",
ifelse(
has_rnaseq,
"Genes were restricted to those in the set of detected genes from the provided RNA-Seq dataset. ",
""
),
"
Gene-length was included as the source of any potential sampling bias for a
gene being mapped to any target-bound range. This analysis provides an overall
viewpoint as to which pathways are regulated across either dataset.
"
)
div(class = "table",
div(class = "table-header", div(class = "caption", cp)),
tbl
)
pwf_t1t2 <- nullp(
vec_t1t2,
bias.data = log10(setNames(all_ids$w, all_ids$gene_id)[mapped_ids]),
plot.fit = TRUE
)
if (sum(pwf_t1t2$DEgenes) > 0) {
goseq_t1t2_res <- pwf_t1t2 %>%
goseq(gene2cat = gs_by_geneid, method = gs_method) %>%
dplyr::filter(numDEInCat > 0) %>%
as_tibble() %>%
dplyr::select(
gs_name = category, pval = over_represented_pvalue, starts_with("num")
) %>%
mutate(
adj_p = p.adjust(pval, adj_method),
enriched = adj_p < enrich_alpha
) %>%
dplyr::filter(numDEInCat >= min_sig) %>%
left_join(
dplyr::filter(msigdb, gene_id %in% mapped_ids)[c("gs_name", "gene_name", "gs_url", "gs_description")],
by = "gs_name"
) %>%
chop(gene_name) %>%
mutate(gene_name = vapply(gene_name, paste, character(1), collapse = "; "))
}
any_goseq_t1t2 <- sum(goseq_t1t2_res$adj_p < enrich_alpha) > 0
tg_t1t2 <- make_tbl_graph(
goseq_t1t2_res,
gs = gs_by_gsid %>%
lapply(intersect, rownames(subset(pwf_t1t2, DEgenes))) %>%
.[vapply(., length, integer(1)) > 0]
)
plot_network_t1t2 <- length(tg_t1t2) >= min_network_size
if (!any_goseq_t1t2) para <- glue(para, " No enrichment was found.")
tbl <- goseq_t1t2_res %>%
dplyr::filter(enriched) %>%
dplyr::select(
gs_name, gs_description, pval, adj_p, starts_with("num"), gene_name
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
gs_name = colDef(
name = "Gene Set",
cell = function(value) htmltools::tags$a(
href = gs_url[[value]],
target = "_blank",
str_replace_all(value, "_", " ")
),
html = TRUE,
maxWidth = 150
),
gs_description = colDef(
name = "Description",
cell = function(value) str_trunc(value, width = 150)
),
pval = colDef(
name = "P-Value",
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
adj_p = colDef(
name = "p<sub>adj</sub>", html = TRUE,
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
numDEInCat = colDef(
name = "Genes Mapped to Both",
cell = function(value) comma(value, 1),
maxWidth = 80
),
numInCat = colDef(
name = "Genes Mapped to Either",
cell = function(value) comma(value, 1),
maxWidth = 80
),
gene_name = colDef(
name = "Gene Names",
cell = function(value) with_tooltip(value, width = 150)
)
)
)
div(class = "table",
div(class = "table-header", div(class = "caption", para)),
tbl
)
goseq_t1only_res <- tibble(
gs_name = character(), mapped = numeric(), enriched = logical(),
adj_p = numeric()
)
vec_t1only <- structure(
mapped_ids %in% setdiff(comp_ids[[1]], comp_ids[[2]]),
names = mapped_ids
)
gs_method <- "Hypergeometric"
any_goseq_t1only <- plot_network_t1only <- FALSE
para <- glue(
"
All {comma(length(setdiff(comp_ids[[1]], comp_ids[[2]])))} genes with a
mapped binding window to only {comps[[1]]} were compared to
the {comma(length(mapped_ids))} genes mapped to either
{glue_collapse(comps, last = ' or ')}, in order to ascertain whether any gene-sets
were likely to be targeted exclusively. No sampling bias offset was
incorporated, instead using a simple Hypergeometric model for enrichment testing.
"
)
pwf_t1only <- nullp(
vec_t1only,
bias.data = log10(setNames(all_ids$w, all_ids$gene_id)[mapped_ids]),
plot.fit = TRUE
)
if (sum(pwf_t1only$DEgenes) > 0) {
goseq_t1only_res <- pwf_t1only %>%
goseq(gene2cat = gs_by_geneid, method = gs_method) %>%
dplyr::filter(numDEInCat > 0) %>%
as_tibble() %>%
dplyr::select(
gs_name = category, pval = over_represented_pvalue, starts_with("num")
) %>%
mutate(
adj_p = p.adjust(pval, adj_method),
enriched = adj_p < enrich_alpha
) %>%
dplyr::filter(numDEInCat >= min_sig) %>%
left_join(
dplyr::filter(msigdb, gene_id %in% mapped_ids)[c("gs_name", "gene_name", "gs_url", "gs_description")],
by = "gs_name"
) %>%
chop(gene_name) %>%
mutate(gene_name = vapply(gene_name, paste, character(1), collapse = "; "))
}
any_goseq_t1only <- sum(goseq_t1only_res$adj_p < enrich_alpha) > 0
tg_t1only <- make_tbl_graph(
goseq_t1only_res,
gs = gs_by_gsid %>%
lapply(intersect, rownames(subset(pwf_t1only, DEgenes))) %>%
.[vapply(., length, integer(1)) > 0]
)
plot_network_t1only <- length(tg_t1only) >= min_network_size
if (!any_goseq_t1only) para <- glue(para, " No enrichment was found.")
tbl <- goseq_t1only_res %>%
dplyr::filter(enriched) %>%
dplyr::select(
gs_name, gs_description, pval, adj_p, starts_with("num"), gene_name
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
gs_name = colDef(
name = "Gene Set",
cell = function(value) htmltools::tags$a(
href = gs_url[[value]],
target = "_blank",
str_replace_all(value, "_", " ")
),
html = TRUE,
maxWidth = 150
),
gs_description = colDef(
name = "Description",
cell = function(value) str_trunc(value, width = 150)
),
pval = colDef(
name = "P-Value",
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
adj_p = colDef(
name = "p<sub>adj</sub>", html = TRUE,
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
numDEInCat = colDef(
name = glue("Genes Mapped to {comps[[1]]} Only"),
cell = function(value) comma(value, 1),
maxWidth = 80
),
numInCat = colDef(
name = "Genes Mapped to Both",
cell = function(value) comma(value, 1),
maxWidth = 80
),
gene_name = colDef(
name = "Gene Names",
cell = function(value) with_tooltip(value, width = 150)
)
)
)
div(class = "table",
div(class = "table-header", div(class = "caption", para)),
tbl
)
goseq_t2only_res <- tibble(
gs_name = character(), mapped = numeric(), enriched = logical(),
adj_p = numeric()
)
vec_t2only <- structure(
mapped_ids %in% setdiff(comp_ids[[2]], comp_ids[[1]]),
names = mapped_ids
)
gs_method <- "Hypergeometric"
para <- glue(
"
All {comma(length(setdiff(comp_ids[[2]], comp_ids[[1]])))} genes with a
mapped binding window to only {comps[[2]]} were compared to
the {comma(length(mapped_ids))} genes mapped to either
{glue_collapse(comps, last = ' or ')}, in order to ascertain whether any gene-sets
were likely to be targeted exclusively. No sampling bias offset was
incorporated, instead using a simple Hypergeometric model for enrichment testing.
"
)
any_goseq_t2only <- plot_network_t2only <- FALSE
pwf_t2only <- nullp(
vec_t2only,
bias.data = log10(setNames(all_ids$w, all_ids$gene_id)[mapped_ids]),
plot.fit = TRUE
)
if (sum(pwf_t2only$DEgenes) > 0) {
goseq_t2only_res <- pwf_t2only %>%
goseq(gene2cat = gs_by_geneid, method = gs_method) %>%
dplyr::filter(numDEInCat > 0) %>%
as_tibble() %>%
dplyr::select(
gs_name = category, pval = over_represented_pvalue, starts_with("num")
) %>%
mutate(
adj_p = p.adjust(pval, adj_method),
enriched = adj_p < enrich_alpha
) %>%
dplyr::filter(numDEInCat >= min_sig) %>%
left_join(
dplyr::filter(msigdb, gene_id %in% mapped_ids)[c("gs_name", "gene_name", "gs_url", "gs_description")],
by = "gs_name"
) %>%
chop(gene_name) %>%
mutate(gene_name = vapply(gene_name, paste, character(1), collapse = "; "))
}
any_goseq_t2only <- sum(goseq_t2only_res$adj_p < enrich_alpha) > 0
tg_t2only <- make_tbl_graph(
goseq_t2only_res,
gs = gs_by_gsid %>%
lapply(intersect, rownames(subset(pwf_t2only, DEgenes))) %>%
.[vapply(., length, integer(1)) > 0]
)
plot_network_t2only <- length(tg_t2only) >= min_network_size
if (!any_goseq_t2only) para <- glue(para, " No enrichment was found.")
tbl <- goseq_t2only_res %>%
dplyr::filter(enriched) %>%
dplyr::select(
gs_name, gs_description, pval, adj_p, starts_with("num"), gene_name
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
gs_name = colDef(
name = "Gene Set",
cell = function(value) htmltools::tags$a(
href = gs_url[[value]],
target = "_blank",
str_replace_all(value, "_", " ")
),
html = TRUE,
maxWidth = 150
),
gs_description = colDef(
name = "Description",
cell = function(value) str_trunc(value, width = 150)
),
pval = colDef(
name = "P-Value",
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
adj_p = colDef(
name = "p<sub>adj</sub>", html = TRUE,
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
numDEInCat = colDef(
name = glue("Genes Mapped to {comps[[2]]} Only"),
cell = function(value) comma(value, 1),
maxWidth = 80
),
numInCat = colDef(
name = "Genes Mapped to Both",
cell = function(value) comma(value, 1),
maxWidth = 80
),
gene_name = colDef(
name = "Gene Names",
cell = function(value) with_tooltip(value, width = 150)
)
)
)
div(class = "table",
div(class = "table-header", div(class = "caption", para)),
tbl
)
tg_mapped %>%
ggraph(layout = net_layout, weights = oc^2) +
geom_edge_link(aes(width = oc^2, alpha = oc^2)) +
geom_node_point(
aes(fill = -log10(pval), size = numDEInCat),
shape = 21
) +
geom_node_text(
aes(label = label),
colour = "black", size = 3,
data = . %>%
mutate(
label = str_replace_all(label, "_", " ") %>% str_trunc(60) %>% str_wrap(width = 18)
),
repel = TRUE, max.overlaps = max(10, round(length(tg_mapped) / 4, 0)),
bg.color = "white", bg.r = 0.1,
) +
scale_x_continuous(expand = expansion(c(0.1, 0.1))) +
scale_y_continuous(expand = expansion(c(0.1, 0.1))) +
scale_fill_viridis_c(option = "inferno", begin = 0.25) +
scale_size_continuous(range = c(1, 10)) +
scale_edge_width(range = c(1, 6), limits = c(0, 1)) +
scale_edge_alpha(range = c(0.1, 0.4), limits = c(0, 1)) +
guides(edge_alpha= "none") +
labs(size = "Mapped Genes", edge_width = expr(paste(OC^2))) +
theme_void()
Network plot showing gene-sets enriched amongst the overall set of sites with a binding site from either comparison.
tg_t1t2 %>%
ggraph(layout = net_layout, weights = oc^2) +
geom_edge_link(aes(width = oc^2, alpha = oc^2)) +
geom_node_point(
aes(fill = -log10(pval), size = numDEInCat),
shape = 21
) +
geom_node_text(
aes(label = label),
colour = "black", size = 3,
data = . %>%
mutate(
label = str_replace_all(label, "_", " ") %>% str_trunc(60) %>% str_wrap(width = 18)
),
repel = TRUE, max.overlaps = max(10, round(length(tg_t1t2) / 4, 0)),
bg.color = "white", bg.r = 0.1,
) +
scale_x_continuous(expand = expansion(c(0.1, 0.1))) +
scale_y_continuous(expand = expansion(c(0.1, 0.1))) +
scale_fill_viridis_c(option = "inferno", begin = 0.25) +
scale_size_continuous(range = c(1, 10)) +
scale_edge_width(range = c(1, 6), limits = c(0, 1)) +
scale_edge_alpha(range = c(0.1, 0.4), limits = c(0, 1)) +
guides(edge_alpha= "none") +
labs(size = "Mapped Genes", edge_width = expr(paste(OC^2))) +
theme_void()
Network plot showing gene-sets enriched amongst the overall set of sites with a binding site for both AR and H3K27ac.
tg_t1only %>%
ggraph(layout = net_layout, weights = oc^2) +
geom_edge_link(aes(width = oc^2, alpha = oc^2)) +
geom_node_point(
aes(fill = -log10(pval), size = numDEInCat),
shape = 21
) +
geom_node_text(
aes(label = label),
colour = "black", size = 3,
data = . %>%
mutate(
label = str_replace_all(label, "_", " ") %>% str_trunc(60) %>% str_wrap(width = 18)
),
repel = TRUE, max.overlaps = max(10, round(length(tg_t1only) / 4, 0)),
bg.color = "white", bg.r = 0.1,
) +
scale_x_continuous(expand = expansion(c(0.1, 0.1))) +
scale_y_continuous(expand = expansion(c(0.1, 0.1))) +
scale_fill_viridis_c(option = "inferno", begin = 0.25) +
scale_size_continuous(range = c(1, 10)) +
scale_edge_width(range = c(1, 6), limits = c(0, 1)) +
scale_edge_alpha(range = c(0.1, 0.4), limits = c(0, 1)) +
guides(edge_alpha= "none") +
labs(size = "Mapped Genes", edge_width = expr(paste(OC^2))) +
theme_void()
Network plot showing all gene-sets enriched amongst the overall set of sites with a binding site for AR only.
tg_t2only %>%
ggraph(layout = net_layout, weights = oc^2) +
geom_edge_link(aes(width = oc^2, alpha = oc^2)) +
geom_node_point(
aes(fill = -log10(pval), size = numDEInCat),
shape = 21
) +
geom_node_text(
aes(label = label),
colour = "black", size = 3,
data = . %>%
mutate(
label = str_replace_all(label, "_", " ") %>% str_trunc(60) %>% str_wrap(width = 18)
),
repel = TRUE, max.overlaps = max(10, round(length(tg_t2only) / 4, 0)),
bg.color = "white", bg.r = 0.1,
) +
scale_x_continuous(expand = expansion(c(0.1, 0.1))) +
scale_y_continuous(expand = expansion(c(0.1, 0.1))) +
scale_fill_viridis_c(option = "inferno", begin = 0.25) +
scale_size_continuous(range = c(1, 10)) +
scale_edge_width(range = c(1, 6), limits = c(0, 1)) +
scale_edge_alpha(range = c(0.1, 0.4), limits = c(0, 1)) +
guides(edge_alpha= "none") +
labs(size = "Mapped Genes", edge_width = expr(paste(OC^2))) +
theme_void()
Network plot showing all gene-sets enriched amongst the overall set of sites with a binding site for H3K27ac only.
n_peaks <- structure(all_ids$n_windows, names = all_ids$gene_id)[mapped_ids]
goseq_group_res <- group_ids %>%
.[str_detect(names(.), "Up|Down")] %>%
mclapply(
function(x) {
vec <- structure(mapped_ids %in% x, names = mapped_ids)
pwf <- nullp(vec, bias.data = n_peaks, plot.fit = FALSE)
res <- tibble(adj_p = numeric())
gs_method <- "Hypergeometric"
if (sum(pwf$DEgenes) > 0) {
res <- goseq(pwf, gene2cat = gs_by_geneid, method = gs_method) %>%
as_tibble() %>%
dplyr::filter(numDEInCat > 0) %>%
mutate(adj_p = p.adjust(over_represented_pvalue, adj_method)) %>%
dplyr::filter(numDEInCat >= min_sig) %>%
dplyr::select(
gs_name = category, pval = over_represented_pvalue, adj_p,
ends_with("InCat")
) %>%
left_join(
dplyr::filter(msigdb, gene_id %in% x)[c("gs_name", "gene_name", "gs_url", "gs_description")],
by = "gs_name"
) %>%
chop(gene_name) %>%
mutate(
gene_name = vapply(gene_name, paste, character(1), collapse = "; ")
)
}
res
},
mc.cores = threads
) %>%
## This step at the end basically removes any groups with no results
lapply(list) %>%
as_tibble() %>%
pivot_longer(everything(), names_to = "group") %>%
unnest(everything()) %>%
split(.$group)
goseq_group_sig <- goseq_group_res %>%
lapply(dplyr::filter, adj_p < enrich_alpha) %>%
.[vapply(., nrow, integer(1)) > 0]
tg_group_res <- names(goseq_group_sig) %>%
lapply(
function(x) {
make_tbl_graph(
goseq_group_sig[[x]],
gs_by_gsid[goseq_group_sig[[x]]$gs_name] %>%
lapply(intersect, group_ids[[x]])
)
}
) %>%
setNames(names(goseq_group_sig)) %>%
.[vapply(., length, integer(1)) >= min_network_size]
The same MSigDB gene sets were used for associating any pathways with the combined changes in both AR and H3K27ac. The ‘universe’ of genes was defined as all 26,098 gene ids with a binding window mapped from either target. No sampling bias was included, with test results using a simple Hypergeometric Distribution.
htmltools::tagList(
names(goseq_group_sig) %>%
mclapply(
function(x) {
cp <- glue(
"
All {comma(length(group_ids[[x]]), 1)} genes mapped to binding windows
with the pattern {x} were compared to the complete set of
{comma(length(mapped_ids), 1)} genes mapped to at least one binding
window. {nrow(goseq_group_sig[[x]])} gene-sets were considered as
enriched using a threshold of {enrich_alpha} for adjusted-p-values.
"
)
htmltools::div(
htmltools::div(
id = x %>%
str_to_lower %>%
str_replace_all(" ", "-") %>%
str_replace_all("-+", "-"),
class="section level4",
htmltools::h4(class = "tabset", x),
htmltools::tags$em(cp),
goseq_group_sig[[x]] %>%
separate(group, into = comps, sep = " - ") %>%
dplyr::select(
all_of(comps), gs_name, gs_description, pval, adj_p, everything()
) %>%
reactable(
filterable = TRUE,
showPageSizeOptions = TRUE,
columns = list2(
"{comps[[1]]}" := colDef(
cell = function(value) {
value <- str_extract(value, "Up|Down|Unchanged|Undetected")
if (str_detect(value, "Up")) value <- paste(value, "\u21E7")
if (str_detect(value, "Down")) value <- paste(value, "\u21E9")
paste(targets[[1]], value)
},
style = function(value) {
colour <- case_when(
str_detect(value, "Up") ~ colours$direction[["up"]],
str_detect(value, "Down") ~ colours$direction[["down"]],
str_detect(value, "Unchanged") ~ colours$direction[["unchanged"]],
str_detect(value, "Undetected") ~ colours$direction[["undetected"]]
)
list(color = colour)
},
maxWidth = 120
),
"{comps[[2]]}" := colDef(
cell = function(value) {
value <- str_extract(value, "Up|Down|Unchanged|Undetected")
if (str_detect(value, "Up")) value <- paste(value, "\u21E7")
if (str_detect(value, "Down")) value <- paste(value, "\u21E9")
paste(targets[[2]], value)
},
style = function(value) {
colour <- case_when(
str_detect(value, "Up") ~ colours$direction[["up"]],
str_detect(value, "Down") ~ colours$direction[["down"]],
str_detect(value, "Unchanged") ~ colours$direction[["unchanged"]],
str_detect(value, "Undetected") ~ colours$direction[["undetected"]]
)
list(color = colour)
},
maxWidth = 120
),
gs_name = colDef(
name = "Gene Set",
cell = function(value) {
htmltools::tags$a(
href = gs_url[[value]], target = "_blank",
str_replace_all(value, "_", " ")
)
},
html = TRUE,
maxWidth = 150
),
gs_description = colDef(
name = "Description",
cell = function(value) str_trunc(value, width = 150)
),
gs_url = colDef(show = FALSE),
pval = colDef(
name = "P-Value",
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
adj_p = colDef(
name = "p<sub>adj</sub>", html = TRUE,
cell = function(value) ifelse(
value < 0.001,
sprintf("%.2e", value),
sprintf("%.3f", value)
),
maxWidth = 80
),
numDEInCat = colDef(
name = "Mapped Genes",
cell = function(value) comma(value, 1),
maxWidth = 80
),
numInCat = colDef(
name = "Genes in Gene-Set",
cell = function(value) comma(value, 1),
maxWidth = 80
),
gene_name = colDef(
name = "Gene Names",
cell = function(value) with_tooltip(value, width = 150)
)
)
)
)
)
},
mc.cores = threads
)
)
goseq_group_sig %>%
bind_rows() %>%
arrange(pval) %>%
left_join(n_mapped, by = c("group" = "status")) %>%
mutate(
gs_name = fct_inorder(gs_name) %>%
fct_relabel(str_replace_all, "_", " ") %>%
fct_relabel(str_trunc, width = 60),
prop = numDEInCat / mapped,
group = factor(group, levels = n_mapped$status)
) %>%
droplevels() %>%
ggplot(aes(group, fct_rev(gs_name))) +
geom_point(aes(size = prop, fill = -log10(pval)), shape = 21) +
geom_ysidecol(
aes(x = numInCat), data = . %>% distinct(gs_name, numInCat),
) +
geom_xsidecol(
aes(y = mapped),
data = n_mapped %>%
dplyr::filter(status %in% names(goseq_group_sig)) %>%
dplyr::rename(group = status),
width = 0.5
) +
scale_x_discrete(labels = label_wrap(10)) +
scale_ysidex_continuous(expand = expansion(c(0, 0.15))) +
scale_xsidey_continuous(
expand = expansion(c(0, 0.15))
) +
scale_fill_viridis_c(option = "inferno", begin = 0.2) +
scale_size(range = c(0, 5), labels = percent) +
labs(
x = c(), y = c(), size = "% Mapped\nGenes", fill = expr(paste(-log[10], "p"))
) +
theme(
panel.grid = element_blank(),
axis.text = element_text(size = 8),
legend.position = "right",
ggside.panel.scale.x = 0.2,
ggside.panel.scale.y = 0.3,
ggside.axis.text.x.bottom = element_text(angle = 270, hjust = 0, vjust = 0.5)
)
Combined enrichment across all groups, showing only significant results for enrichment. The top panel shows how many genes were mapped to sites for each group, whilst the right panel shows gene set size. Point sizes indicate the proportion of mapped genes which are from each pathway.
htmltools::tagList(
names(tg_group_res) %>%
mclapply(
function(x) {
## Export the image
img_out <- file.path(
fig_path,
x %>%
str_replace_all(" ", "_") %>%
str_replace_all("_-_", "-") %>%
paste0("_network.", fig_type)
)
fig_fun(
filename = img_out,
width = knitr::opts_current$get("fig.width"),
height = knitr::opts_current$get("fig.height")
)
p <- tg_group_res[[x]] %>%
ggraph(layout = net_layout, weights =oc^2) +
geom_edge_link(aes(width = oc^2, alpha = oc^2)) +
geom_node_point(
aes(fill = -log10(pval), size = numDEInCat),
shape = 21
) +
geom_node_text(
aes(label = label),
colour = "black", size = 3,
data = . %>%
mutate(
label = str_replace_all(label, "_", " ") %>% str_trunc(60) %>% str_wrap(width = 18)
),
repel = TRUE,
max.overlaps = max(10, round(length(tg_group_res[[x]]) / 4, 0)),
bg.color = "white", bg.r = 0.1
) +
scale_x_continuous(expand = expansion(c(0.1, 0.1))) +
scale_y_continuous(expand = expansion(c(0.1, 0.1))) +
scale_fill_viridis_c(option = "inferno", begin = 0.25) +
scale_size_continuous(range = c(1, 10)) +
scale_edge_width(range = c(1, 6), limits = c(0, 1)) +
scale_edge_alpha(range = c(0.1, 0.4), limits = c(0, 1)) +
guides(edge_alpha= "none") +
labs(size = "Mapped Genes", edge_width = expr(paste(OC^2))) +
theme_void()
print(p)
dev.off()
## Create html tags
fig_link <- str_extract(img_out, "assets.+")
cp <- htmltools::tags$em(
glue(
"
Network plot showing enriched pathways mapped to genes
associated with {x}.
"
)
)
htmltools::div(
htmltools::div(
id = img_out %>%
basename() %>%
str_remove_all(glue(".{fig_type}$")) %>%
str_to_lower() %>%
str_replace_all("_", "-"),
class="section level4",
htmltools::h4(x),
htmltools::div(
class = "figure", style = "text-align: center",
htmltools::img(src = fig_link, width = "100%"),
htmltools::tags$caption(cp)
)
)
)
},
mc.cores = threads
)
)
write_rds(all_windows, all_out$results, compress = "gz")
save.image(all_out$renv)
all_windows %>%
as_tibble() %>%
dplyr::select(-detected) %>%
unnest(all_of("gene_id")) %>% # This ensures correct id2gene mappings
mutate(gene_name = id2gene[gene_id]) %>%
dplyr::select(
starts_with("gene"), range, any_of(c("region", "feature", "hic")),
) %>%
write_csv(
gzfile(all_out$csv)
)
write_csv(de_genes_both_comps, all_out$de_genes)
goseq_group_sig %>%
bind_rows() %>%
write_csv(all_out$goseq)
cmn_res %>%
bind_rows() %>%
mutate(
leadingEdge = vapply(leadingEdge, paste, character(1), collapse = "; "),
gs_url = gs_url[gs_name]
) %>%
write_csv(all_out$rna_enrichment)
During this workflow, the following files were exported:
R version 4.2.2 (2022-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
locale: LC_CTYPE=en_AU.UTF-8, LC_NUMERIC=C, LC_TIME=en_AU.UTF-8, LC_COLLATE=en_AU.UTF-8, LC_MONETARY=en_AU.UTF-8, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C
attached base packages: parallel, grid, stats4, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: tidygraph(v.1.2.3), ggraph(v.2.1.0), extraChIPs(v.1.2.4), BiocParallel(v.1.32.5), GenomicInteractions(v.1.32.0), InteractionSet(v.1.26.0), SummarizedExperiment(v.1.28.0), Biobase(v.2.58.0), MatrixGenerics(v.1.10.0), matrixStats(v.1.0.0), goseq(v.1.50.0), geneLenDataBase(v.1.34.0), BiasedUrn(v.2.0.10), htmltools(v.0.5.5), msigdbr(v.7.5.1), ggside(v.0.2.2), rlang(v.1.1.1), ggrepel(v.0.9.3), scales(v.1.2.1), magrittr(v.2.0.3), UpSetR(v.1.4.0), VennDiagram(v.1.7.3), futile.logger(v.1.4.3), rtracklayer(v.1.58.0), plyranges(v.1.18.0), GenomicRanges(v.1.50.0), GenomeInfoDb(v.1.34.9), IRanges(v.2.32.0), S4Vectors(v.0.36.0), BiocGenerics(v.0.44.0), reactable(v.0.4.4), yaml(v.2.3.7), pander(v.0.6.5), glue(v.1.6.2), lubridate(v.1.9.2), forcats(v.1.0.0), stringr(v.1.5.0), dplyr(v.1.1.2), purrr(v.1.0.1), readr(v.2.1.4), tidyr(v.1.3.0), tibble(v.3.2.1), ggplot2(v.3.4.2) and tidyverse(v.2.0.0)
loaded via a namespace (and not attached): utf8(v.1.2.3), tidyselect(v.1.2.0), RSQLite(v.2.3.1), AnnotationDbi(v.1.60.0), htmlwidgets(v.1.6.2), munsell(v.0.5.0), codetools(v.0.2-19), interp(v.1.1-4), withr(v.2.5.0), colorspace(v.2.1-0), filelock(v.1.0.2), highr(v.0.10), knitr(v.1.43), rstudioapi(v.0.14), labeling(v.0.4.2), GenomeInfoDbData(v.1.2.9), polyclip(v.1.10-4), farver(v.2.1.1), bit64(v.4.0.5), rprojroot(v.2.0.3), vctrs(v.0.6.3), generics(v.0.1.3), lambda.r(v.1.2.4), xfun(v.0.39), csaw(v.1.32.0), biovizBase(v.1.46.0), timechange(v.0.2.0), BiocFileCache(v.2.6.0), R6(v.2.5.1), doParallel(v.1.0.17), graphlayouts(v.1.0.0), clue(v.0.3-64), locfit(v.1.5-9.8), AnnotationFilter(v.1.22.0), bitops(v.1.0-7), cachem(v.1.0.8), DelayedArray(v.0.24.0), vroom(v.1.6.3), BiocIO(v.1.8.0), nnet(v.7.3-19), gtable(v.0.3.3), ensembldb(v.2.22.0), GlobalOptions(v.0.1.2), splines(v.4.2.2), lazyeval(v.0.2.2), dichromat(v.2.0-0.1), broom(v.1.0.5), checkmate(v.2.2.0), crosstalk(v.1.2.0), GenomicFeatures(v.1.50.2), backports(v.1.4.1), Hmisc(v.5.1-0), EnrichedHeatmap(v.1.27.2), tools(v.4.2.2), ellipsis(v.0.3.2), jquerylib(v.0.1.4), RColorBrewer(v.1.1-3), Rcpp(v.1.0.10), plyr(v.1.8.8), base64enc(v.0.1-3), progress(v.1.2.2), zlibbioc(v.1.44.0), RCurl(v.1.98-1.12), prettyunits(v.1.1.1), rpart(v.4.1.19), deldir(v.1.0-9), viridis(v.0.6.3), GetoptLong(v.1.0.5), reactR(v.0.4.4), cluster(v.2.1.4), here(v.1.0.1), ComplexUpset(v.1.3.3), data.table(v.1.14.8), futile.options(v.1.0.1), circlize(v.0.4.15), ProtGenerics(v.1.30.0), hms(v.1.1.3), patchwork(v.1.1.2), evaluate(v.0.21), XML(v.3.99-0.14), jpeg(v.0.1-10), shape(v.1.4.6), gridExtra(v.2.3), compiler(v.4.2.2), biomaRt(v.2.54.0), crayon(v.1.5.2), mgcv(v.1.8-42), tzdb(v.0.4.0), Formula(v.1.2-5), DBI(v.1.1.3), tweenr(v.2.0.2), formatR(v.1.14), dbplyr(v.2.3.2), ComplexHeatmap(v.2.14.0), MASS(v.7.3-60), rappdirs(v.0.3.3), babelgene(v.22.9), Matrix(v.1.5-4.1), cli(v.3.6.1), metapod(v.1.6.0), Gviz(v.1.42.0), igraph(v.1.4.2), pkgconfig(v.2.0.3), GenomicAlignments(v.1.34.0), foreign(v.0.8-84), xml2(v.1.3.3), foreach(v.1.5.2), bslib(v.0.5.0), XVector(v.0.38.0), VariantAnnotation(v.1.44.0), digest(v.0.6.31), Biostrings(v.2.66.0), rmarkdown(v.2.22), htmlTable(v.2.4.1), edgeR(v.3.40.0), restfulr(v.0.0.15), curl(v.5.0.1), Rsamtools(v.2.14.0), rjson(v.0.2.21), lifecycle(v.1.0.3), nlme(v.3.1-162), jsonlite(v.1.8.5), viridisLite(v.0.4.2), limma(v.3.54.0), BSgenome(v.1.66.3), fansi(v.1.0.4), pillar(v.1.9.0), lattice(v.0.21-8), KEGGREST(v.1.38.0), fastmap(v.1.1.1), httr(v.1.4.6), GO.db(v.3.16.0), png(v.0.1-8), iterators(v.1.0.14), bit(v.4.0.5), ggforce(v.0.4.1), stringi(v.1.7.12), sass(v.0.4.6), blob(v.1.2.4), latticeExtra(v.0.6-30) and memoise(v.2.0.1)